# Feel free to add dependencies, but make sure that they are included in environment.yml
#disable some annoying warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
# import builtins
import random
import os
from copy import copy
from itertools import product, chain
#plots the figures in place instead of a new window
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()
import sklearn
from sklearn import manifold
from openTSNE import TSNE
from umap import UMAP
To be able to explore paths in a projected space, you need to pick a problem/algorithm/model that consists of multiple states that change iteratively.
Read in your data from a file or create your own data.
Document any data processing steps.
FIELD_SIZE = 4 # you can change this to 3, the data for it is already cached.
BIN_CNT = FIELD_SIZE*(FIELD_SIZE-1)
BINS = [0] + [2**i for i in range(1, BIN_CNT)]
BIN_ = ['cnt' + str(i) for i in BINS]
FIELD_ = ['a' + str(i) for i in range(FIELD_SIZE*FIELD_SIZE)]
FIELD_NAMES = ','.join(FIELD_)
BIN_NAMES = ','.join(BIN_)
ALGO = ["random", "reward-greedy", "zeros-la-1", "zeros-la-2", "zeros-la-3"]
RUNS_PER_ALGO = 10
def classify_state(matrix):
return (matrix == 0).sum()
def look_forward(env, steps=4, anchor=True):
score = []
board = copy(env.board)
for dir in range(4):
cenv = copy(env)
next_state, reward, done, info = cenv.step(dir)
if steps:
# disable move if it doesn't change the field (invalid move)
if anchor and (board == next_state).all():
score += [-1]
else:
score += [sum(look_forward(cenv, steps=steps-1, anchor=False) + classify_state(next_state))]
else:
score += [classify_state(next_state)]
return score
# pool.map can only pass tuples
def explore(params):
algo, repeat_nr = params
lookahead = ALGO.index(algo) - 1
states = []
env = gym.make('2048-v0', width=FIELD_SIZE, height=FIELD_SIZE)
moves = 0
total_reward = 0
for step in range(9999999):
if algo == "random": # random is our fallback for all same anyways
rewards = [[0]*4]
elif algo == "reward-greedy": # reward-greedy
rewards = [(cenv := copy(env)).step(direction)[1] for direction in range(4)]
else:
rewards = look_forward(env, lookahead)
best_move = np.random.choice(np.argwhere(rewards == np.amax(rewards)).flatten())
next_state, reward, done, info = env.step(best_move)
moves += 1
total_reward += reward
bin_cnt = [(next_state == i).sum() for i in BINS]
zeros = classify_state(next_state)
nsfl = next_state.flatten().tolist()
state = [total_reward, zeros, f"{algo}-{repeat_nr}", algo, step, total_reward, zeros, sum(nsfl), max(nsfl)]
state += nsfl + bin_cnt
states.append(','.join(map(str, state)))
# env.render()
# print({"lookahead": lookahead, "moves": moves, "reward": total_reward, "rewards": rewards, "biggest tile": next_state.max()})
if done:
break
return states
fn = f"data/2048/trace{RUNS_PER_ALGO}_{FIELD_SIZE}.csv"
if not os.path.isfile(fn):
# only load the gym-stuff if we need it to keep the requirements smaller.
import gym_2048
import gym
from multiprocessing import Pool
algo_repeat = list(product(ALGO, range(RUNS_PER_ALGO)))
lines = Pool(16).map(explore, algo_repeat)
with open(fn, "w") as f:
f.write(f"x,y,line,algo,step,reward,zeros,sum,biggest,{FIELD_NAMES},{BIN_NAMES}\n")
f.writelines("\n".join(chain(*lines)))
# also save a version with 1 instead of 10 lines
df = pd.read_csv(fn)
df1 = df[df["line"].str.endswith("-0")]
df1.to_csv(fn.replace("10", "1"), index=False)
else:
df = pd.read_csv(fn)
df # beautiful ipython display instead of ascii-plot that you get with print
| x | y | line | algo | step | reward | zeros | sum | biggest | a0 | ... | cnt4 | cnt8 | cnt16 | cnt32 | cnt64 | cnt128 | cnt256 | cnt512 | cnt1024 | cnt2048 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 14 | random-0 | random | 0 | 0 | 14 | 4 | 2 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 13 | random-0 | random | 1 | 0 | 13 | 6 | 2 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 4 | 13 | random-0 | random | 2 | 4 | 13 | 8 | 4 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 4 | 12 | random-0 | random | 3 | 4 | 12 | 10 | 4 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 8 | 12 | random-0 | random | 4 | 8 | 12 | 12 | 4 | 0 | ... | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17950 | 15448 | 1 | zeros-la-3-9 | zeros-la-3 | 897 | 15448 | 1 | 1980 | 1024 | 2 | ... | 4 | 1 | 2 | 2 | 1 | 0 | 1 | 1 | 1 | 0 |
| 17951 | 15456 | 1 | zeros-la-3-9 | zeros-la-3 | 898 | 15456 | 1 | 1982 | 1024 | 2 | ... | 2 | 2 | 2 | 2 | 1 | 0 | 1 | 1 | 1 | 0 |
| 17952 | 15460 | 1 | zeros-la-3-9 | zeros-la-3 | 899 | 15460 | 1 | 1984 | 1024 | 4 | ... | 3 | 2 | 2 | 2 | 1 | 0 | 1 | 1 | 1 | 0 |
| 17953 | 15460 | 0 | zeros-la-3-9 | zeros-la-3 | 900 | 15460 | 0 | 1986 | 1024 | 4 | ... | 3 | 2 | 2 | 2 | 1 | 0 | 1 | 1 | 1 | 0 |
| 17954 | 15464 | 0 | zeros-la-3-9 | zeros-la-3 | 901 | 15464 | 0 | 1988 | 1024 | 4 | ... | 4 | 2 | 2 | 2 | 1 | 0 | 1 | 1 | 1 | 0 |
17955 rows × 37 columns
In addition to just the raw field-counts (a0-a15), i extended the data by:
Project your data into a 2D space. Try multiple (3+) projection methods (e.g., t-SNE, UMAP, MDS, PCA, ICA, other methods) with different settings and compare them.
Make sure that all additional dependencies are included when submitting.
# This is the plotting-function required in ex3, but we define it here so we can
# already visualize it here
MARKERS = dict(zip(ALGO, "oX^s*")) # circle, cross, triangle, square, start
COLORS = dict(zip(ALGO, ["green", "yellow", "red", "purple", "blue"])) # circle, cross, triangle, square, start
def plot_lines(xy, label):
lines = [g[1] for g in df.groupby("line", group_keys=False)]
plt.figure(figsize=(20, 10))
alpha = 1 / np.sqrt(len(lines))
algos = set()
for line in reversed(lines):
x,y = xy[line.index].T
algo = line["algo"].iloc[0]
marker = MARKERS[algo]
color = COLORS[algo]
if algo not in algos:
algos.add(algo)
else:
algo = None
plt.plot(x, y, marker=marker, markersize=10, markevery=[0,-1], c=color, alpha=alpha, linewidth=1, label=algo)
plt.legend()
plt.title(label + " + " + DATA_NAME)
plt.show()
DATA_NAME = "raw data"
plot_lines(df[["x", "y"]].to_numpy(), "step over zeros")
# Rather bad attempts first, including multiple metrics, unfold cell output to see all failed but interesting-looking attempts
TSNE_METRICS = ['euclidean','manhattan','hamming']
UMAP_METRICS = ["euclidean","manhattan","chebyshev","minkowski","canberra","braycurtis","cosine","correlation","hamming","jaccard","dice","kulsinski","ll_dirichlet","hellinger","rogerstanimoto","sokalmichener","sokalsneath","yule"]
df = pd.read_csv(fn)
# try visualizations for fields, log-fields and bins with TSNE, UMAP, PCA and ICA
for DATA_NAME, DATA in (("FIELD", FIELD_), ("FIELD_log", FIELD_), ("BIN", BIN_)):
xy = df[DATA].to_numpy()
if DATA_NAME == "FIELD_log":
xy = np.log2(xy+1)
for metric in TSNE_METRICS:
try:
plot_lines(TSNE(metric=metric).fit(xy), "tsne "+metric)
except:
print(metric)
# for metric in UMAP_METRICS: # didn't yield good resullts so it's commented out
# plot_lines(UMAP(metric=metric).fit_transform(xy), "umap "+metric)
plot_lines(sklearn.decomposition.FastICA(n_components=2).fit_transform(xy), "ICA")
plot_lines(sklearn.decomposition.PCA(n_components=2).fit_transform(xy), "PCA")